{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 决策树原理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = np.loadtxt('x.txt')\n",
    "X = X[:, 2:]\n",
    "y = np.loadtxt('y.txt')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAGkNJREFUeJzt3X9sXWd9x/HP1/faY6YsrYgFbdrY1QZICVAgVlcEQt3cTi0ttNsQPxTGWkAeNmxUMG1AJFArRWLSYCDA6bzWaYs92NYCa2nZRgMSIH4MJyuUJEUqXdMfMGpgS+mMlsT57o9z09jX5/o+955zz6/7fklX8Xn8nPN8b1C/nDzP9zzH3F0AgGoZyDsAAED6SO4AUEEkdwCoIJI7AFQQyR0AKojkDgAVRHIHgAoiuQNABZHcAaCC6nkNvHnzZh8bG8treAAopf379//M3Ufa9cstuY+NjWlxcTGv4QGglMzsSEg/pmUAoIJI7gBQQSR3AKggkjsAVBDJHQAqiOQOABVEcgeACmqb3M3sPDP7qpkdMrODZvbumD4Xm9lRM7uv8flgb8IFUGYLC9LYmDQwEP25sJD8/KTXrKqQh5hOSHqvux8ws2dJ2m9mX3b3Q039vu7uV6YfIoAqWFiQJiel5eXo+MiR6FiSdu7s7vxrr5XMpGPHurtmlbW9c3f3n7j7gcbPv5R0WNKWXgcGoFp27TqdmE9ZXo7auz3/+PHTib2ba1ZZR3PuZjYm6aWSvhPz65eb2ffM7Etmtr3F+ZNmtmhmi0tLSx0HC6C8Hnmks/Zu+3Xat6qCk7uZnSHpDknXufuTTb8+IGnU3S+Q9AlJX4i7hrvPuvu4u4+PjLTd9wZAhWzd2ll7t/067VtVQcndzAYVJfYFd/9c8+/d/Ul3f6rx8z2SBs1sc6qRAii13bul4eG1bcPDUXu35w8OSkND3V+zykKqZUzSzZIOu/tHW/R5bqOfzOzCxnV/nmagAMpt505pdlYaHY0WQUdHo+PQhc+48/fulebmur9mlZm7b9zB7JWSvi7pfkknG80fkLRVktz9RjN7l6QpRZU1v5L0Hnf/5kbXHR8fd7b8BYDOmNl+dx9v1y+kWuYb7m7u/mJ3f0njc4+73+juNzb6fNLdt7v7Be5+UbvEDiBfWdSGT09L9Xp0R12vR8fITm4v6wCQj6T15iGmp6U9e04fr6ycPp6ZSWcMbKzttEyvMC0D5GNsLErozUZHpYcfTmeMej1K6M1qNenEiXTG6FepTcsAqJak9eYh4hL7Ru1IH8kd6DNJ681D1GqdtSN9JHegzyStNw9xag4/tB3pI7kDfSZpvXmImRlpaur0nXqtFh2zmJodFlQBoERYUAWAPkZyB/pQ6Esv0n45RifnhvYtw8s6conR3XP57NixwwFkb37efXjYXTr9GRx0Hxpa2zY0FLW36zc8HF2zm3FbnRvat5Nr5iXtGCUtekCOZc4d6DOtHmJKIuQBqE4engrtm8UDWUmlHWPonDvJHegzAwPR/WOazKSTJzfu02rcuHND+3ZyzbykHSMLqgBi9eJFFiHX7OThqdC+WTyQlVReMZLcgT4T+tKLoaGovV2/0AegOnl4KrRvFg9kJZVbjCET8734sKAK5Gd+3n101N0s+nN+PllbknGT9k0ST1bSjFEsqAJA9TDnDiBXVapTj1P0uHlZB4DUhb4QJIsXh/RCGeJmWgZA6qpUpx4nz7iZlgGQm9AXgmTx4pBeKEPcJHcAqatSnXqcMsRNcgeQuirVqccpQ9wkdwCpC30hSBYvDumFMsTNgioAlAgLqgB6Iqu94ItUR16kWIKFPMbaiw/bDwDlE7c3eei+73H9ku7nntd3znPPeLH9AIC0ZbUXfJHq34sUi8S0DIAe6EUdd9w1i1RHXqRYOkFyBxAsq73gi1RHXqRYOkFyBxAsrr47dN/3uH5J93PPQpFi6QTJHUCwuPruuTlp7961bXv3Ru3t+rWqDS9SHXmRYulE2wVVMztP0m2SniPJJc26+8eb+pikj0t6taRlSde4+4GNrsuCKgB0Ls0F1ROS3uvu2yRdJOmdZratqc/lkp7X+ExK2tNhvAASalWL3Ysa9KpI8p0L//cVUi+5+iPpnyVd2tT2t5LetOr4h5LO3ug61LkD6WlViz011X1dep613FlIUr+eZ+27elHnbmZjkr4m6YXu/uSq9i9K+rC7f6NxvE/SX7p7y3kXpmWA9LSqxa7VpJWV7q9b9H3Vk0hSv16p/dzN7AxJd0i6bnVi7zCoSTNbNLPFpaWlbi4BIEarmuskiX2j61ZBkvr1MtS+ByV3MxtUlNgX3P1zMV0el3TequNzG21ruPusu4+7+/jIyEg38QKI0armulbrzXWrIEn9ehlq39sm90YlzM2SDrv7R1t0u1PSWyxykaSj7v6TFOMEsIFWtdiTk93XpZehljuJJPXrpah9bzcpL+mVikogvy/pvsbn1ZLeIekdjT4m6VOSfiTpfknj7a7LgiqQrvl599FRd7Poz1OLe3HtoW1Vl+Q75/X3JTYOA4DqYeMwAJLi67Gnp6V6PXrisl6PjkPPLZoyxJiHet4BAOidhYVo3n15OTo+ckS65hrpxInTfVZWpD2Nxw5nZjY+d3Iy+rkoj96XIca8MC0DVFgn+6/XamuTftH2MY9ThhjTxrQMgI7qrptr4stQy12GGPNCcgcqrJO66+aa+DLUcpchxryQ3IEKi6vHrrdYaTs1V73RuUWr5S5DjHkhuQMVFrcX+S23SFNTp+/Ua7XoePViaqtzi7aPeRlizAsLqgBQIiyoAhV2yRsekNVOyMxltRO65A0PBNeuS+nXhseNHTpGJ7FUev/1tIU8xtqLD9sPAN2ZeP1hl06u2Us8Om5ui/Zzb5b2XuRTU75uXMl9YKD9GJ3EUtb919Mmth8AqslqJ6STYc8fNteuS+nXhtfr4VsLN4/RSSxl3X89baHTMiR3oGTMXNFefWGa/xMfGFjfFl1XOnmym3g667t6jE5iSRJ32t85T8y5A1U1EP4Gjrj93NOuDe9kz/jmMTqJper7r6eN5A6UzMTrHlS0C/dqHtO2vnZdSr82PG4MKbpbbjdGJ7FUfv/1tIVMzPfiw4Iq0L2J1x92DRyPFlEHjvvE6w/71JR7rRYtFtZq8Yupp6S9F3nc2KFjdBJLGfdfT5tYUAWA6mHOHSi4rOquF+5f0NjHxjRw/YDGPjamhfurXuANif3cgVxktQ/5wv0LmrxrUsvHo4GOHD2iybuigXa+iGf0q4xpGSAHWdVdj31sTEeOrh9odNOoHr4uxYGQGaZlgALLah/yR47GX7BVO6qD5A7kIKu6662b4i/Yqh3VQXIHcpBV3fXuid0aHlw70PDgsHZPVLnAGxLJHchFVvuQ73zRTs2+Zlajm0ZlMo1uGtXsa2ZZTO0DLKgCQImwoAoUXGj9eS/q1ItU+953+6xnhDp3IAeh9ee9qFMvUu17VvX+/YhpGSAHofXnvahTL1Lte5X2Wc8K0zJAgYXWn/eiTr1Ite9Z1fv3I5I7kIPQ+vNe1KkXqfa9H/dZzwrJHchBaP15L+rUi1T73pf7rGeE5A7kILT+vBd16kWqfc+q3r8fsaAKACWS2oKqmc2Z2RNm9oMWv7/YzI6a2X2Nzwe7CRgooyT14ls+skV2vT392fKRLbHXCx1j+u5p1W+oy6431W+oa/ru6SjGmDpyasurr+2du5m9StJTkm5z9xfG/P5iSX/u7ld2MjB37ii75npxKZq7Dpni2PKRLfrxUz9uO8bgwKDMTMdWjm04xvTd09qzuGfd+RO/vFnf2vPWp+vIJWlwMJoCOXb6khoeZjqkLFK7c3f3r0n6RSpRARWya9+uNYldkpaPL2vXvl1tzw1J7JJ0/OTxNYm91Riz+2djz99308SaxC5Jx4+vTexS9BDRrvZho0TSWlB9uZl9z8y+ZGbbW3Uys0kzWzSzxaWlpZSGBvKRZ7148xgrvhLf8eh54dektrxS0kjuBySNuvsFkj4h6QutOrr7rLuPu/v4yMhICkMD+cmzXrx5jJrV4jtuejT8mtSWV0ri5O7uT7r7U42f75E0aGabE0cGFFySevFzzjgnaIzBgUEN1YbajjG5YzL2/Im371tXRz44KA2tvSS15RWUOLmb2XPNzBo/X9i45s+TXhcouiT14o+/9/F1Cf6cM87R/B/Mr7ne3qv3au6qubZjzFwxo6nxqafv4GtW09T4lO7967euqyPfu1eam6O2vOpCqmU+I+liSZsl/VTShyQNSpK732hm75I0JemEpF9Jeo+7f7PdwFTLAEDn0qyWeZO7n+3ug+5+rrvf7O43uvuNjd9/0t23u/sF7n5RSGIHiiKrfc1b1aCHxBN3bpH2Y2+FWvp88YQq+laSOvVOtKpBnxqf0swVMxvGUx+o68TJE+vObW7vRdxJNO/TLlFLn5bQO3eSO/pWVvua12+ox5Yq1qymEx88naBbxRMqj/3YW2Gf9t5hP3egjazq1FvVoDe3Jx03j/3YW2Gf9vyR3NG3sqpTb1WD3tyedNw89mNvhX3a80dyR9/Kal/zVjXoze1x8dQH4l9z3Nye137srbBPe/5I7uhbWe1r3qoGffViaqt4brn6lthzb7n6lkLsx94K+7TnjwVVACgRFlQBoI/FT+gBJbJw/4J27dulR44+oq2btmr3xO5EUxSX3HaJ9v3nvqePJ86f0POf/XzN7p/Viq+oZjVN7pjUzBUzmr57el27pHVtr9j6inUxSgpqK9J0C8qDaRmUWtoPIjUn9o1s27xNh352KKhvzWprSh+HakNydx0/efzpttAXc6C/8RAT+kLaDyLZ9ZZCVOkq0sNJyB9z7ugLeb4wIytV+i7IDskdpZbnCzOyUqXvguyQ3FFqaT+INHH+RHDfbZu3Bfdtfhp1qDakwYHBNW2hL+YAQpDcUWppP4h071vuXZfgJ86fiH2Q6OA7D8a2x7Xd+vu3rolx7qo57b16b1cv5gBCsKAKACXCgioQIO6lF528CCO0b5KXa5ThxRwoHu7c0bfiauQ7qTUPrbFPUouf1QtFUB7UuQNtdPJyjLha89Aa+yS1+Fm9UATlwbQM0EYn9eNxfUNr7JPU4vdDHT96g+SOvtVJ/Xhc39Aa+yS1+P1Qx4/eILmjb8XVyHdSax5aY5+kFj+rF4qgekju6FtxNfKd1JqH1tgnqcXP6oUiqB4WVAGgRFhQRabKUIudtKYdKBPu3JFYGWqx42KM21O9aHEDzbhzR2Z27du1JmlK0vLxZe3atyuniNaLi/HYyrE1iV0qXtxAt0juSKwMtdhJa9qBsiG5I7Ey1GInrWkHyobkjsTKUIsdF2PcnupFixvoFskdiZWhFjsuxrg91YsWN9CtttUyZjYn6UpJT7j7C2N+b5I+LunVkpYlXePuB9oNTLUMAHQuzWqZWyRdtsHvL5f0vMZnUtKekACB1abvnlb9hrrselP9hrqm755O1C/t/dOph0fZ1Nt1cPevmdnYBl2uknSbR/8E+LaZnWlmZ7v7T1KKERU3ffe09iyevidY8ZWnj2eumOm4X3NN+5GjRzR516Qkdbx/+pGjR3TtF65ds8d7J9cD8pLGnPsWSY+uOn6s0QYEmd0/G9Qe2i9J3X3cucdPHl/z8o5OrgfkJdMFVTObNLNFM1tcWlrKcmgU2IqvBLWH9uvF/ulJ+wJZSyO5Py7pvFXH5zba1nH3WXcfd/fxkZGRFIZGFdSsFtQe2q8X+6cn7QtkLY3kfqekt1jkIklHmW9HJyZ3TAa1h/ZLe//0TvZ4B4qibXI3s89I+pakF5jZY2b2NjN7h5m9o9HlHkkPSXpQ0t9Jii9fAFqYuWJGU+NTT9+B16ymqfGpNYuknfRLe//0TvZ4B4qCXSEBoETYFRIA+hjJHQAqiOQOABVEcgeACiK5A0AFkdwBoIJI7gBQQSR3AKggkjsAVBDJHQAqiOQOABVEcgeACiK5A0AFkdwBoIJI7gBQQSR3AKggkjsAVBDJHQAqiOQOABVEcgeACiK5A0AFkdwBoIJI7gBQQSR3AKggknsnFhaksTFpYCD6c2Eh74gAIFY97wBKY2FBmpyUlpej4yNHomNJ2rkzv7gAIAZ37qF27Tqd2E9ZXo7aAaBgSO6hHnmks3YAyBHJPdTWrZ21A0COSO6hdu+WhofXtg0PR+0AUDAk91A7d0qzs9LoqGQW/Tk7y2IqgEKiWqYTO3eSzAGUQtCdu5ldZmY/NLMHzex9Mb+/xsyWzOy+xuft6YdaUNS+AyigtnfuZlaT9ClJl0p6TNJ3zexOdz/U1PUf3P1dPYixuKh9B1BQIXfuF0p60N0fcvdjkj4r6arehlUS1L4DKKiQ5L5F0qOrjh9rtDX7QzP7vpndbmbnxV3IzCbNbNHMFpeWlroIt2CofQdQUGlVy9wlaczdXyzpy5Jujevk7rPuPu7u4yMjIykNnSNq3wEUVEhyf1zS6jvxcxttT3P3n7v7/zUOb5K0I53wCo7adwAFFZLcvyvpeWZ2vpkNSXqjpDtXdzCzs1cdvlbS4fRCLDBq3wEUVNtqGXc/YWbvkvSvkmqS5tz9oJndIGnR3e+U9Gdm9lpJJyT9QtI1PYy5WKh9B1BAQXPu7n6Puz/f3X/T3Xc32j7YSOxy9/e7+3Z3v8Ddf8fdH+hl0F0LrUm/5JLoTvzU55JLWp8fek3q4QFkyd1z+ezYscMzNT/vPjzsLp3+DA9H7atNTKztc+qzbdv684eG3AcH218zdGwAaEPRjEnbHGtR3+yNj4/74uJidgOOjUUPGTUbHZUefvj0sVnysZqvGTo2ALRhZvvdfbxdv/7ZOCzLmvTma1IPDyBj/ZPcs6xJb74m9fAAMtY/yT20Jn1iIv78bdvWnz80JA0Otr8m9fAAMtY/yT20Jv3ee9cn+IkJ6eDB9efPzUl797a/JvXwADLWPwuqAFABLKgCQB/rr+Q+PS3V69HUSL0eHcc9sNTJA0c8nASggPpnWmZ6WtqzJ6yvWfSo0SnDw/Fz5M0v69ioLwCkIHRapn+Se70urax0f37cA0c8nAQgY8y5N0uS2KX4B454OAlAQfVPcq/Vkp0f98ARDycBKKj+Se6nXlwdonl/mVYPHPFwEoCC6p/kPjMjTU2dvoOv1aLjuAeWPv3psAeOeDgJQEH1z4IqAFRAdRdUQ+vK42rat29fW9O+fXu0P8zqtqEh6ayz1raddVZ0zS1b1rZv2cLLOgAUU8im7734dPWyjtCXXkxNxb9wI4sPL+sA0EOq5Ms6QuvKk9a0J8XLOgD0SDWnZULryvNM7BIv6wCQu3Il99C68qQ17Unxsg4AOStXcg+tK++kpj1tvKwDQAGUK7mH1pW3qmnftm1tv23b1r9JaXBQOvPMtW1nnhktg55zztr2c86R5ud5WQeAwinXgioA9LlqLqi2kqSGPO7cuHp4ACiRet4BJNa8p/qRI6fn3NtNe8Sd++Y3r+936FCU4A8eTC9uAOih8k/LJKkhb3VuKzn9XQHAKf0zLZOkhpw6cwAVVf7knqSGnDpzABVV/uSepIY87txWmssoAaDAyp/ck9SQx507Px9fD89iKoASCVpQNbPLJH1cUk3STe7+4abf/5qk2yTtkPRzSW9w94c3uiZ17gDQudQWVM2sJulTki6XtE3Sm8yseY7ibZL+291/S9LfSPqrzkMGAKQlZFrmQkkPuvtD7n5M0mclXdXU5ypJtzZ+vl3ShFnzi0gBAFkJSe5bJD266vixRltsH3c/IemopGenESAAoHOZLqia2aSZLZrZ4tLSUpZDA0BfCUnuj0s6b9XxuY222D5mVpe0SdHC6hruPuvu4+4+PjIy0l3EAIC2QpL7dyU9z8zON7MhSW+UdGdTnzsl/XHj59dJ+ornta8BACC4FPLVkj6mqBRyzt13m9kNil7UeqeZPUPSpyW9VNIvJL3R3R9qc80lSR1s7LLOZkk/S3B+kfBdiqlK30Wq1vfp5+8y6u5tpz5y2zgsKTNbDKn1LAO+SzFV6btI1fo+fJf2yv+EKgBgHZI7AFRQmZP7bN4BpIjvUkxV+i5Stb4P36WN0s65AwBaK/OdOwCghdIldzObM7MnzOwHeceSlJmdZ2ZfNbNDZnbQzN6dd0zdMrNnmNm/m9n3Gt/l+rxjSsrMamb2H2b2xbxjScLMHjaz+83sPjMr9VasZnammd1uZg+Y2WEze3neMXXDzF7Q+N/j1OdJM7su1THKNi1jZq+S9JSk29z9hXnHk4SZnS3pbHc/YGbPkrRf0tXufijn0DrW2Cjume7+lJkNSvqGpHe7+7dzDq1rZvYeSeOSfsPdr8w7nm6Z2cOSxt299HXhZnarpK+7+02NhyqH3f1/8o4ricbOu49L+m13T/Lszxqlu3N3968pelCq9Nz9J+5+oPHzLyUd1vpN2UrBI081Dgcbn3LdOaxiZudKukLSTXnHgoiZbZL0Kkk3S5K7Hyt7Ym+YkPSjNBO7VMLkXlVmNqboCd/v5BtJ9xrTGPdJekLSl929tN9F0RPZfyHpZN6BpMAl/ZuZ7TezybyDSeB8SUuS9jamy24ys2fmHVQK3ijpM2lflOReAGZ2hqQ7JF3n7k/mHU+33H3F3V+iaHO5C82slNNmZnalpCfcfX/esaTkle7+MkUv3HlnY2qzjOqSXiZpj7u/VNL/SnpfviEl05haeq2kf0r72iT3nDXmp++QtODun8s7njQ0/qn8VUmX5R1Ll14h6bWNuerPSvpdM5vPN6TuufvjjT+fkPR5RS/gKaPHJD226l+EtytK9mV2uaQD7v7TtC9Mcs9RYxHyZkmH3f2jeceThJmNmNmZjZ9/XdKlkh7IN6ruuPv73f1cdx9T9E/mr7j7m3MOqytm9szGYr0aUxi/J6mUlWbu/l+SHjWzFzSaJiSVrvigyZvUgykZKfpnTqmY2WckXSxps5k9JulD7n5zvlF17RWS/kjS/Y25akn6gLvfk2NM3Tpb0q2Nlf8BSf/o7qUuIayI50j6fOOtl3VJf+/u/5JvSIn8qaSFxnTGQ5KuzTmerjX+z/ZSSX/Sk+uXrRQSANAe0zIAUEEkdwCoIJI7AFQQyR0AKojkDgAVRHIHgAoiuQNABZHcAaCC/h9XoVDK3FAEIQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.scatter(X[y == 0, 0], X[y == 0, 1], color='r')\n",
    "plt.scatter(X[y == 1, 0], X[y == 1, 1], color='g')\n",
    "plt.scatter(X[y == 2, 0], X[y == 2, 1], color='b')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "from collections import Counter\n",
    "def gini(y):\n",
    "    counter = Counter(y)\n",
    "    result = 0\n",
    "    for v in counter.values():\n",
    "        result += (v / len(y))**2\n",
    "    return 1 - result\n",
    "\n",
    "def cut(X, y, d, v):\n",
    "    left_index = (X[:, d] <= v)\n",
    "    right_index = (X[:, d] > v)\n",
    "    return X[left_index], X[right_index], y[left_index], y[right_index]\n",
    "\n",
    "def try_split(X, y):\n",
    "    best_g = 1\n",
    "    best_d = -1\n",
    "    best_v = -1\n",
    "    \n",
    "    for d in range(X.shape[1]):\n",
    "        sorted_index = np.argsort(X[:, d])\n",
    "        for i in range(len(X)-1):\n",
    "            if X[sorted_index[i], d] == X[sorted_index[i + 1], d]:\n",
    "                continue\n",
    "            \n",
    "            v = (X[sorted_index[i], d] + X[sorted_index[i + 1], d]) / 2\n",
    "            # print('d={}, v={}'.format(d, v))\n",
    "            X_left, X_right, y_left, y_right = cut(X, y, d, v)\n",
    "            \n",
    "            g_all = gini(y_left) + gini(y_right)\n",
    "            \n",
    "            # print('d={}, v={}, g={}'.format(d, v, g_all))\n",
    "            \n",
    "            if g_all < best_g:\n",
    "                best_g = g_all\n",
    "                best_d = d\n",
    "                best_v = v\n",
    "    return best_d, best_v, best_g"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0, 1.9, 0.5)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "try_split(X, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Node():\n",
    "    def __init__(self, d=None, v=None, g=None, l=None):\n",
    "        self.dim = d\n",
    "        self.value = v\n",
    "        self.gini = g\n",
    "        self.label = l\n",
    "        \n",
    "        self.children_left = None\n",
    "        self.children_right = None\n",
    "    def __repr__(self):\n",
    "        return \"d={}, v={}, g={}, l={}\".format(self.dim, self.value, self.gini, self.label)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_tree(X, y):\n",
    "    d, v, g = try_split(X, y)\n",
    "    if d == -1 or g == 0:     # 不能再进行划分时\n",
    "        return None\n",
    "\n",
    "    node = Node(d, v, g)\n",
    "    \n",
    "    X_left, X_right, y_left, y_right = cut(X, y, d, v)\n",
    "    \n",
    "    node.children_left = create_tree(X_left, y_left)\n",
    "    if node.children_left is None:\n",
    "        label = Counter(y_left).most_common(1)[0][0]\n",
    "        node.children_left = Node(l = label)\n",
    "    \n",
    "    node.children_right = create_tree(X_right, y_right)\n",
    "    if node.children_right is None:\n",
    "        label = Counter(y_right).most_common(1)[0][0]\n",
    "        node.children_right = Node(l = label)\n",
    "        \n",
    "    return node"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "d=0, v=1.9, g=0.5, l=None"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tree = create_tree(X, y)\n",
    "tree"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "d=None, v=None, g=None, l=0.0"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tree.children_left"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "d=1, v=1.7, g=0.21057149006459386, l=None"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tree.children_right"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
